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EXHIBIT A 


o 



#! /internet /bin/perl5. 002 -w 



# Copyright (c) 1998 

# Eugene Wang 


# *** BEGIN *** 


# 


#input sequence (File 0) to compare 


# 


if ($#ARGV < 2) {die "argv < 2";} 

open(EnzymeInput, $ARGV[0] ) || die "Cannot open input file $ARGV[0]"; 

tprint "Input Enzyme 1 sequence = "; 
$Elsequence = <EnzymeInput>; 
chomp $Elsequence; 
$lenElSeq = length ($Elsequence) ; 
$Elsequence -~ tr/a-z/A-Z/; 

$ElExtLoc = <EnzymeInput>; 
chomp ($ElExtLoc) ; 

$lenElTotal = $lenElSeq + $ElExtLoc; 

#print "Input Enzyme 2 sequence = "; 
$E2sequence = <EnzymeInput>; 
chomp $E2sequence; 

$E2sequence = reverse ( $E2sequence ) ; 
$lenE2Seq == length ($E2sequence) ; 
$E2sequence tr/a-z/A-Z/; 

$E2ExtLoc = <EnzymeInput>; 
chomp ($E2ExtLoc) ; 

$lenE2Total = $lenE2Seq + $E2ExtLoc; 

$lenElExtra = $E2ExtLoc - $ElExtLoc; 

$ElSizeStart = <EnzymeInput>; 
chomp ($ElSizeStart) ; 
$ElSizeEnd = <Enzyme Input >; 
chomp ($ElSizeEnd) ; 


# 


#open input FAST A file (File 1) 



# 


#print "Input file name = "; 
#$fname = <>; 
# chomp $fname; 

#$fname = "H_DJ0167F23 . seq" ; 

open(Infile, $ARGV[1] ) || die "Cannot open input file $ARGV[1]"; 
# 


#open output file (File 2) 

# 

open (Outfile, ">$ARGV[2] ") I I die "Cannot open output file $ARGV[2]"; 
#open (Outfile, ">output.txt") ; 
fprint Outfile "Qualif ier\tSequence"; 

# 


#read input FAST A file 
# 


$line = <Infile>; #header line 

print Outfile "$line"; 
$linecount = 0; 
$FullSeq = ""; 

# 


#check headerline format 
# 


chomp $line; 

©fields = split (/\ | /, $line) ; 
$ntokens = 0; 

foreach (@fields) { $ntokens++; } 
#$ntokens =* Gfields; 

if ($ntokens > 3) 

{$FragmentID = $fields[3];} 
else 

{ 

$line =~ s/ A > />/; 

@fields = split (/ /,$line); 

$ntokens = 0; 

foreach (@fields) { $ntokens++; } 
if ($ntokens > 0} 

{$FragmentID = $fields[0]; $FragmentID =~ s/ A >//;} 

else 

{$FragmentID = "UnknownFragment" ; } 

} 

while ($line = <Infile>) #read in a line 

{ 



print Outfile "Enzyme top strand: "; 
print Outfile " (5\ 1 -$Elsequence ,r ; 

if ($ElExtLoc>0) {print Outfile " (N) $ElExtLoc" ; } 
print Outfile "-3\')"; 
print Outfile "\n"; 

print Outfile "Enzyme bottom strand: "; 
print Outfile "(5V-"; 

if ($E2ExtLoc>0) {print Outfile " (N) $E2ExtLoc"; } 

print Outfile "$E2sequence-3\ 1 ) " / 

print Outfile " or 

my $ts = reverse ($E2sequence ) ; 

print Outfile "(3\»-$ts"; 

if ($E2ExtLoc>0) {print Outfile " (N) $E2ExtLoc"; } 
print Outfile "-5V)"; 

print Outfile "\n"; 

print Outfile "Segment size: $ElSizeStart - $ElSizeEnd\n" ; 

$minLen = $lenE!Total < $lenE2Total ? $lenElTotal : $lenE2Total; 
$maxLen = $lenElTotal > $lenE2Total ? $lenElTotal : $lenE2Total; 

$nMatchEl = 0; 
$nSelected = 0; 
QEnzLocLeft = ( ) ; 
©EnzLocRight = () ; 
@EnzTypeLeft - () ; 
@EnzTypeRight = {); 

if ($minLen > 0) 
{ 

# for ($i=0; $i <= $lenFullSeq-$lenElSeq; $i++) 

for ($i=0; $i <= $lenFullSeq-$maxLen; $i++) 
{ 

if (substr($FullSeq,$i,$lenElSeq) eq $Elsequence) 
{ 


# $EnzLocLeft [$nMatchEl] « $i + $lenElTotal; 
##have to use push() 

# $EnzTypeLeft [$nMatchEl] = 1; 

push (@EnzLocLeft f $i + $lenElTotal ) ; 
push (@EnzTypeLeft, 1) ; 

# print Outfile "$nMatchEl\t$i\t" ; 

# print Outfile "type l\t"; 

# print Outfile rr $Elsequence\t " ; 

# print Outfile substr ($FullSeq, $i, $lenElTotal) ; 

# print Outfile "\n"; 


if ($nMatchEl > 0) 

{ 

push(@EnzLocRight,$i + $lenElTotal-l) ; 
push(@EnzTypeRight, 1) ; 

} 
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$nMatchEl++; 

} 

# if (substr ($FullSeq, $i+$E2ExtLoc, $lenE2Seq) eq 

$E2sequence) 

elsif (substr ($FullSeq, $i+$E2ExtLoc, $lenE2Seq) eq 

$E2sequence) 


{ 

# $EnzLocLeft [$nMatchEl] = $i; 

# $EnzCutLeft [$nMatchEl] = 2; 

push (@EnzLocLeft, $i) ; 

push (@EnzTypeLeft, 2) ; 

# print Out file M $nMatchEl\t$i\t" ; 

# print Outfile "type 2\t"; 

# print Outfile "$E2sequence\t"; 

# print Outfile substr ( $FullSeq, $i, $lenE2Total) ; 

# print Outfile "\n"; 


if ($nMatchEl > 0) 
{ 

push {@EnzLocRight, $i-l) ; 
push (@EnzTypeRight, 2) ; 

} 

$nMatchEl++; 

} 

} 

if ($nMatchEl > 0) 
{ 

push (@EnzLocRight , $i-l) ; 
push(@EnzTypeRight, 2) ; 

} 

print Outfile "Number of segments: $nMatchEl\n" ; 
if ($nMatchEl != ($#EnzLocRight+l) ) {die ("Counting 
error • . .nMatchEl ($nMatchEl) != $#EnzLocRight " ) ; } 

print Outfile "Matched loci:\n"; 

for ($i=0; $i < $nMatchEl; $i++) 
{ 

print Outfile "$EnzLocLef t [ Si] \t " ; 
} 

print Outfile "\nSegment Size:\n"; 
f or ($i=0; $i < $nMatchEl-l; $i++) 
{ 

$tmpSegSize = $EnzLocRight [$i] - $EnzLocLef t [$i] + 1; 
if ($tmpSegSize >= $ElSizeStart && $tmpSegSize <« 

$ElSizeEnd) 

{ 

$SegSelected[$nSelected++] = $i; 
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} 

print Outfile "$tmpSegSize\t " ; 
} 

} 

## 


## print out the Segment (El) sequences 

## 

print Outfile "\nSegments Selected <$nSelected) : " ; 
for ($i=0; $i < $nSelected; $i++) 
{ 

$selSeq = $SegSelected [ $i] ; 
$Elleft = $EnzLocLeft [$selSeq] ; 
$Elright = $EnzLocRight [$selSeq] ; 

if ($lenElExtra > 0) {$Elright += $lenElExtra; } 
else {$Elleft += $lenElExtra; } 
$lenSelSeq = $Elright - $Elleft + 1; 

$OutputHeaderLine = ">" . $FragmentID . "_" .$selSeq . "\tsize=" . 
$lenSelSeq; 

$OutputHeaderLine .= n \tLoci=" . $Elleft . "-" . $Elright; 
$OutputHeaderLine .= n \tEnz$EnzTypeLeft [$selSeq] - 
Enz$EnzTypeRight [$selSeq] "; 

print Outfile "\n$OutputHeaderLine" ; 
print "$OutputHeaderLine" ; 

# Segment sequence 

$SeqEltoNextEl = substr ( $FullSeq, $Ellef t , $lenSelSeq) ; 
print Outfile F, \n$SeqEltoNextEl\n" ; 
print ,f \n$SeqEltoNextEl\n"; 

} 

return ($lenFullSeq) ; 
> 
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EXHIBIT B 


#!/internet/bin/perl5.002 -w 

^*******************************************^ 

# Copyright (c) 1998 

# Author: Eugene Wang 

# Title: Ligate 

# Purpose: Find matching segments /sequences in two files 
if ($#ARGV != 2) {die "Number of argv ($#ARGV+1) != 3";} 

# 

#input file 

# 

open(InfileLigate,$ARGV[0] ) or die "Open error ... $ARGV[0] \n"; 

$locLigate = <Inf ileLigate>; 
chomp $locLigate; 
$seqLigate = <Inf ileLigate>; 
chomp $seqLigat en- 
close (InfileLigate) ; 


toutput file 

# 

open(Infile,$ARGV[l] ) or die "Open error ... $ARGV [ 1] \n" ; 
$OutName = $ARGV[2] ; 

open (Outfile, ">$OutName") or die( ,f Open error ... $OutName") ; 

$alreadyReadOne = 0; 
$sequence = ""; 

while ($line = <Infile>) #read in a line 

{ 

chomp $line; 

next if ($line eq "") ; 

if (Sline =~ / A #/ I 1 $line / A >/) ##if first char is a 

or ? > T 

{ 

if ($alreadyReadOne ===!){ 

if (&Ligate ($sequence, $locLigate f $seqLigate) «= 1) { 
print Outfile ,r $headerLine\n" ; 
print Outfile "$sequence\n" ; 

}; 

$sequence = ""; 
} 



$headerLine = $line; 
$alreadyReadOne = 1; 
} 

else 
{ 

$sequence .= $line; 
} 

} 

if ($alreadyReadOne ==1) { 

if (&Ligate ($sequence, $locLigate, $seqLigate) == 1) { 
print Outfile !l $headerLine\n" ; 
print Outfile "$sequence\n" ; 

}; 

} 

close (Inf ile) ; 
close (Outfile); 


######################## ################################################# 
############ 

#compare sequence with Ligation Adapter sequence 

#############################################################«####«#### 

############ 
sub Ligate ( ) 

{ 

local $retcode = 0; 

local ($seq, $locLigate, $seqLigate) = @_; 

local $lenLigate = length ($seqLigate) ; 
local $lenSeq = length ($seq) ; 

if ( (substr($seq,$locLigate,$lenLigate) eq $seqLigate) && 

(substr ($seq, $lenSeq-$locLigate-$lenLigate, $lenLigate) eq 
$seqLigate) ) { 

$retcode = 1; 

} 


return $retcode; 
} 
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